/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.phonetic;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashSet;
import java.util.regex.Pattern;
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
import org.apache.commons.codec.language.bm.NameType;
import org.apache.commons.codec.language.bm.PhoneticEngine;
import org.apache.commons.codec.language.bm.RuleType;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.tests.analysis.MockTokenizer;

/** Tests {@link BeiderMorseFilter} */
public class TestBeiderMorseFilter extends BaseTokenStreamTestCase {
  private Analyzer analyzer;

  @Override
  public void setUp() throws Exception {
    super.setUp();
    analyzer =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(
                tokenizer,
                new BeiderMorseFilter(
                    tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
          }
        };
  }

  @Override
  public void tearDown() throws Exception {
    analyzer.close();
    super.tearDown();
  }

  /** generic, "exact" configuration */
  public void testBasicUsage() throws Exception {
    assertAnalyzesTo(
        analyzer,
        "Angelo",
        new String[] {"anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo"},
        new int[] {0, 0, 0, 0, 0, 0},
        new int[] {6, 6, 6, 6, 6, 6},
        new int[] {1, 0, 0, 0, 0, 0});

    assertAnalyzesTo(
        analyzer,
        "D'Angelo",
        new String[] {
          "anZelo",
          "andZelo",
          "angelo",
          "anhelo",
          "anjelo",
          "anxelo",
          "danZelo",
          "dandZelo",
          "dangelo",
          "danhelo",
          "danjelo",
          "danxelo"
        },
        new int[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
        new int[] {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8},
        new int[] {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
  }

  /** restrict the output to a set of possible origin languages */
  public void testLanguageSet() throws Exception {
    final LanguageSet languages =
        LanguageSet.from(
            new HashSet<String>() {
              {
                add("italian");
                add("greek");
                add("spanish");
              }
            });
    Analyzer analyzer =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(
                tokenizer,
                new BeiderMorseFilter(
                    tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true),
                    languages));
          }
        };
    assertAnalyzesTo(
        analyzer,
        "Angelo",
        new String[] {"andZelo", "angelo", "anxelo"},
        new int[] {
          0, 0, 0,
        },
        new int[] {
          6, 6, 6,
        },
        new int[] {
          1, 0, 0,
        });
    analyzer.close();
  }

  /** for convenience, if the input yields no output, we pass it thru as-is */
  public void testNumbers() throws Exception {
    assertAnalyzesTo(
        analyzer, "1234", new String[] {"1234"}, new int[] {0}, new int[] {4}, new int[] {1});
  }

  public void testRandom() throws Exception {
    checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
  }

  public void testEmptyTerm() throws IOException {
    Analyzer a =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new KeywordTokenizer();
            return new TokenStreamComponents(
                tokenizer,
                new BeiderMorseFilter(
                    tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
          }
        };
    checkOneTerm(a, "", "");
    a.close();
  }

  public void testCustomAttribute() throws IOException {
    TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false);
    ((Tokenizer) stream).setReader(new StringReader("D'Angelo"));
    stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
    stream =
        new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
    KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
    stream.reset();
    int i = 0;
    while (stream.incrementToken()) {
      assertTrue(keyAtt.isKeyword());
      i++;
    }
    assertEquals(12, i);
    stream.end();
    stream.close();
  }
}
